library(dplyr)
library(readr)
library(ggplot2)
library(openxlsx)
library(knitr)
library(tibble)
library(stringr)
library(stringi)
library(readxl)
library(lubridate)
library(shiny)
library(plotly)
# Unzip and extract ODK data from ODK zip
df <- as.data.frame(extract_data_from_odk_zip(params$file_path_zip, params$file_name_csv))
# Formatting dates from integer (in ms) to time stamp
df$start <- format_date_ms(df$start)
df$end <- format_date_ms(df$end)
# filtering for events that occurred after 18th July 21
df <- subset(df, as.Date(start) > as.Date("18.07.2021", "%d.%m.%Y"))
# subtracting end from start date
df$time_spent = round(as.numeric(df$end - df$start))
# splitting the node strings so that only the question name remains
df$question = sapply(df$node, create_question)
df <- decode_question(df, df$question, params$codebook)
df <- decode_categories(df, params$codebook)
df <- df %>%
# bringing the data in the right order
arrange(`instance ID`, node, start) %>%
# adding two empty columns to store the new features in
add_column(time_till_change=NA) %>%
add_column(changed_from=NA)
# iterating over the df and computing the time it took until an answer was changed + adding what the question was before
for (i in 1:nrow(df)){
if (df$`old-value`[i]==df$`new-value`[i-1] && !is.na(df$`old-value`[i]) && !is.na(df$`new-value`[i-1]) ){
df$time_till_change[i] <- round(as.numeric(df$start[i]-df$end[i-1]))
} else{
next
}
}
head(df)
| instance ID | event | node | start | end | latitude | longitude | accuracy | old-value | new-value | time_spent | question | question_decoded | new_value_decoded | old_value_decoded | time_till_change | changed_from |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uuid:004b1139-1d2d-4e11-af7b-b6bc49d84656 | group questions | /data/a1 | 2021-09-15 15:04:55 | 2021-09-15 15:08:10 | NA | NA | NA | NA | NA | 195 | a1 | NA | NA | NA | NA | NA |
| uuid:004b1139-1d2d-4e11-af7b-b6bc49d84656 | group questions | /data/a1 | 2021-09-15 20:32:22 | 2021-09-15 20:32:24 | NA | NA | NA | NA | NA | 2 | a1 | NA | NA | NA | NA | NA |
| uuid:004b1139-1d2d-4e11-af7b-b6bc49d84656 | question | /data/a1/a1_a_4 | 2021-09-15 15:04:55 | 2021-09-15 15:08:10 | NA | NA | NA | NA | S-F0059-P0047 | 195 | a1_a_4 | Please scan the participant’s QR code | S-F0059-P0047 | NA | NA | NA |
| uuid:004b1139-1d2d-4e11-af7b-b6bc49d84656 | group questions | /data/b1 | 2021-09-15 15:04:51 | 2021-09-15 15:04:55 | NA | NA | NA | NA | NA | 4 | b1 | NA | NA | NA | NA | NA |
| uuid:004b1139-1d2d-4e11-af7b-b6bc49d84656 | group questions | /data/b1 | 2021-09-15 20:32:19 | 2021-09-15 20:32:22 | NA | NA | NA | NA | NA | 3 | b1 | NA | NA | NA | NA | NA |
| uuid:004b1139-1d2d-4e11-af7b-b6bc49d84656 | group questions | /data/b2 | 2021-09-15 15:10:39 | 2021-09-15 15:11:25 | NA | NA | NA | NA | NA | 46 | b2 | NA | NA | NA | NA | NA |
summary(df)
## instance ID event node
## Length:11469 Length:11469 Length:11469
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## start end latitude
## Min. :2021-08-10 10:49:27 Min. :2021-08-10 10:50:12 Mode:logical
## 1st Qu.:2021-08-27 12:57:02 1st Qu.:2021-08-27 12:49:12 NA's:11469
## Median :2021-09-08 14:56:58 Median :2021-09-08 14:32:27
## Mean :2021-09-07 23:16:08 Mean :2021-09-07 22:09:09
## 3rd Qu.:2021-09-21 12:10:17 3rd Qu.:2021-09-21 12:11:09
## Max. :2021-09-29 19:45:51 Max. :2021-09-29 19:45:51
## NA's :1327
## longitude accuracy old-value new-value
## Mode:logical Mode:logical Length:11469 Length:11469
## NA's:11469 NA's:11469 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## time_spent question question_decoded new_value_decoded
## Min. : 0.00 Length:11469 Length:11469 Length:11469
## 1st Qu.: 17.00 Class :character Class :character Class :character
## Median : 51.00 Mode :character Mode :character Mode :character
## Mean : 71.62
## 3rd Qu.: 86.00
## Max. :5977.00
## NA's :1327
## old_value_decoded time_till_change changed_from
## Length:11469 Min. : 1.00 Mode:logical
## Class :character 1st Qu.: 2.00 NA's:11469
## Mode :character Median : 7.00
## Mean :11.17
## 3rd Qu.:12.00
## Max. :45.00
## NA's :11400
no_inst = length(unique(df$`instance ID`))
no_event = nrow(df)
earliest_start = as.Date(min(df$start))
latest_end = as.Date(max(df$end[!is.na(df$end)]))
Total number of instances: 203
Total number of events/questions: 11469
Examination period: 2021-08-10 - 2021-09-29
df_by_day <- df %>%
mutate(start_date = as.Date(start)) %>%
count(start_date, name = "count")
gg1 <- ggplot(df_by_day, aes(x = start_date, y = count)) +
geom_line() +
geom_smooth(alpha=0.5, colour="red", method="loess", se=F) +
labs(title = "Number of Events/Questions Started by Day with Smoothed Regression Line", y = "Number of Questions/Events Started", x = "Satrt Date") +
theme_light()
gg1
df_wday_hour <- df %>%
mutate(wday=wday(start, label=T, week_start = 1), hour=hour(start)) %>%
count(wday, hour, name="count_wday_hour") %>%
arrange(desc(wday))
theme_heatmap <- theme_light() +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
plot.title = element_text(face = "bold", size = 11, hjust = 0.5),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_text(size=10),
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 10),
legend.position = "none")
gg2 <- ggplot(df_wday_hour, aes(x = wday, y = hour, fill = count_wday_hour)) +
geom_tile(colour="white") +
scale_fill_gradient(low = "#fff0f0", high="#940606") +
scale_y_reverse(breaks=c(23:0), labels=c(23:0), expand = c(0,0)) +
scale_x_discrete(expand = c(0,0), position = "top") +
labs(title = "Number of Started Events/Questions by Day of Week / Hour of Day", y = "Hour of Day") +
geom_text(aes(label = count_wday_hour), size = 2) +
theme_heatmap
gg2
df_clean = subset(df, time_spent<quantile(df$time_spent,0.95, na.rm=TRUE))
hist(df_clean$time_spent[!is.na(df_clean$time_spent)]/60, breaks=20, xlab = "Time Spent in Minutes", main = "Histogram of the Time Spent by Question")
df_median_time_per_question <- df %>%
filter(event=="question") %>%
group_by(question_decoded) %>%
summarise(median_time_spent = median(time_spent)) %>%
arrange(desc(median_time_spent)) %>%
mutate(median_time_spent = round(seconds_to_period(median_time_spent)))
df_median_time_per_question
| question_decoded | median_time_spent |
|---|---|
| Please specify. (j2) What is the main reason for you to choose coming here today rather than going to the closest facility?) | 4M 41S |
| NA | 2M 3S |
| If QR code scanning is not possible, please manually enter the participant identification code | 1M 56S |
| Would you recommend this facility to a friend / family with a sick child? | 1M 14S |
| Did you feel the provider treated you and the child with respect? | 1M 14S |
| Did you find the provider was kind to you? | 1M 14S |
| Did the provider speak in a language you understand? | 1M 13S |
| Did you find the provider showed concern and empathy? | 1M 13S |
| How do you feel overall with the service you received at the facility today? | 1M 13S |
| Was the service delayed or were you kept waiting for a long time? | 1M 13S |
| Did you miss work to bring the child to the facility today? | 1M 4S |
| Did you pay for something at the facility today? | 1M 4S |
| Is this facility the closest health facility to your home? | 1M 4S |
| Do you intend to buy some medicines outside of the facility? | 1M 2S |
| Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | 56S |
| What do you intend to do if the sick child does not get completely better or become worse? | 56S |
| Were you given general information or advice about feeding or breastfeeding? | 56S |
| Were you given a paper or record to take with you for completing the referral? | 45S |
| Were you told where to go? | 45S |
| Were you told why to go? | 45S |
| What do you intend to do now? | 45S |
| When do you need to complete the referral? | 45S |
| Did the provider use the device that is represented in the following picture during the consultation of the child? | 42S |
| Can you specify these signs and symptoms? | 38S |
| Can you show me all the medicines and prescriptions that you received? | 37S |
| Did the provider explain to you how to give these medicines to the child at home? | 37S |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | 37S |
| Can you specify the estimated amount of money you spent on treatment for the child (including medicines)? | 36S |
| What did you pay for? | 36S |
| Please scan the participant’s QR code | 30S |
| Can you explain to me why this device was used? | 28S |
| Did the provider explain to you the result that was given by the device? | 27S |
| How many work days did you miss as the result of this visit? | 26S |
| Did the provider tell you what illness your child has? | 24S |
| Did the provider give or prescribe any medicines for the child to take home? | 24S |
| Did the provider refer the child? | 23S |
| Can you specify the estimated amount you paid for the consultation? | 21S |
| Did the provider use a tablet like this one for the consultation of the child? | 18S |
| Please select the current district | 8S |
| fcode | 8S |
df_changes_per_question <- df %>%
filter(event=="question",
!is.na(time_till_change)) %>%
group_by(question_decoded) %>%
summarise(count_input_changes=n(),
median_time_till_change=median(time_till_change),
sd_time_till_change=sd(time_till_change)) %>%
arrange(desc(count_input_changes)) %>%
mutate(median_time_till_change = round(seconds_to_period(median_time_till_change)),
sd_time_till_change = round(seconds_to_period(sd_time_till_change), 1)) %>%
filter(count_input_changes > 1)
df_changes_per_question
| question_decoded | count_input_changes | median_time_till_change | sd_time_till_change |
|---|---|---|---|
| NA | 15 | 3S | 15.4S |
| Do you intend to buy some medicines outside of the facility? | 6 | 22S | 17.4S |
| Did the provider refer the child? | 5 | 9S | 9.7S |
| Were you given general information or advice about feeding or breastfeeding? | 5 | 6S | 13.8S |
| Did you pay for something at the facility today? | 4 | 17S | 11.8S |
| How do you feel overall with the service you received at the facility today? | 4 | 6S | 3.3S |
| Can you show me all the medicines and prescriptions that you received? | 3 | 7S | 7S |
| Did the provider explain to you how to give these medicines to the child at home? | 3 | 3S | 2.6S |
| Did the provider give or prescribe any medicines for the child to take home? | 3 | 8S | 3.1S |
| Please scan the participant’s QR code | 3 | 1S | 14.4S |
| Was the service delayed or were you kept waiting for a long time? | 3 | 2S | 11.5S |
| Did the provider speak in a language you understand? | 2 | 6S | 4.9S |
| Did you find the provider showed concern and empathy? | 2 | 16S | 9.2S |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | 2 | 20S | 18.4S |
| Is this facility the closest health facility to your home? | 2 | 2S | 0.7S |
df_stream <- df %>%
filter(!is.na(time_till_change)) %>%
count(question_decoded,
old_value_decoded,
new_value_decoded,
name="count_value_pairs",
sort=TRUE) %>%
filter(count_value_pairs > 1)
df_stream
| question_decoded | old_value_decoded | new_value_decoded | count_value_pairs |
|---|---|---|---|
| NA | 1 | 2 | 7 |
| Do you intend to buy some medicines outside of the facility? | Yes, in addition to the medicines prescribed by the healthcare provider | Yes, prescribed by the healthcare provider but not available at the facility | 5 |
| Did the provider refer the child? | Yes | No | 4 |
| Did the provider give or prescribe any medicines for the child to take home? | No | Yes | 3 |
| Did you pay for something at the facility today? | No | Yes | 3 |
| How do you feel overall with the service you received at the facility today? | Somewhat satisfied | Very satisfied | 3 |
| Did the provider explain to you how to give these medicines to the child at home? | Yes, but only for some medicines | Yes, for all medicines | 2 |
| Did the provider speak in a language you understand? | Agree | Strongly agree | 2 |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | Quite confident | Very confident | 2 |
| NA | 2 | 3 | 2 |
df_duration_per_inst <- df %>%
group_by(`instance ID`) %>%
summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>%
filter(duration_per_inst>quantile(duration_per_inst, 0.9, na.rm=TRUE)) %>%
mutate(duration_per_inst = round(seconds_to_period(duration_per_inst))) %>%
arrange(desc(duration_per_inst))
df_duration_per_inst
| instance ID | duration_per_inst |
|---|---|
| uuid:ad9ff8fc-71ab-41d0-ab73-ad6c19b85e21 | 4d 5H 24M 11S |
| uuid:a0162dea-0b24-4b82-964b-faf749585e19 | 11H 45M 51S |
| uuid:5166eb59-6980-41ac-85a4-2f55e54fcc75 | 11H 20M 48S |
| uuid:f53d4292-35c8-449a-b8ca-d2295ff7f42b | 10H 34M 38S |
| uuid:499acd3d-14f3-49a7-be2a-c1a25204faf3 | 10H 27M 44S |
| uuid:1837fdf4-46f7-48e0-90f0-c4095b919336 | 10H 7M 6S |
| uuid:00b87d8e-6538-4803-b102-57f05b7e71bf | 8H 52M 30S |
| uuid:33bf0b7e-142b-42ce-80ba-39668983c516 | 8H 35M 27S |
| uuid:e1c33f29-bb3f-4c84-b225-e604a23671a3 | 8H 26M 11S |
| uuid:e14cb881-07fc-4e3d-a1fa-d361fa78537c | 8H 25M 17S |
| uuid:3dbd438b-a269-47cf-8e78-7c3958ed92a9 | 8H 14M 45S |
| uuid:7ca0683e-1f7d-4d70-9457-72fa8a36c94a | 8H 0M 23S |
| uuid:05162842-0c42-4ed8-9011-a6329b4a081f | 7H 46M 33S |
| uuid:8b343179-1fc7-4bec-9ab8-8f9f15a5caa7 | 7H 31M 18S |
| uuid:dfc113d7-e7c8-4046-b823-cd0019b2d235 | 7H 23M 31S |
| uuid:9c5f66fa-da91-4745-aa41-0cd7056d6d9e | 7H 20M 43S |
| uuid:0875a793-8f50-4c5b-b0a8-cb516bfec204 | 6H 56M 36S |
| uuid:99d5b430-b7b1-47eb-aead-1043688049a3 | 6H 53M 42S |
| uuid:d5fa0e3e-abeb-4a8c-9b66-2d8d860a4c73 | 6H 27M 10S |
| uuid:dd3b85dd-d246-4b5e-809f-6c3371b5e0ca | 6H 15M 55S |
| uuid:dc149e10-8a98-44c8-bba7-f45da91a7f23 | 6H 8M 3S |
df_subsetted <- df %>%
group_by(`instance ID`) %>%
summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>%
filter(duration_per_inst<quantile(duration_per_inst, 0.9, na.rm=TRUE))
hist(as.numeric(df_subsetted$duration_per_inst/60), breaks=30, main="Duration per Instance in Minutes (outliers removed)", xlab="Duration in Minutes")
df_time_till_change_outliers <- df %>%
filter(time_till_change>quantile(df$time_till_change, 0.9, na.rm=TRUE)) %>%
arrange(desc(time_till_change)) %>%
mutate(time_till_change = round(seconds_to_period(time_till_change))) %>%
select(`instance ID`,
question_decoded,
old_value_decoded,
new_value_decoded,
time_till_change)
df_time_till_change_outliers
| instance ID | question_decoded | old_value_decoded | new_value_decoded | time_till_change |
|---|---|---|---|---|
| uuid:efe02baa-d5a2-4c57-a4dd-fbdc9cf6f527 | NA | 2 | 3 | 45S |
| uuid:efe02baa-d5a2-4c57-a4dd-fbdc9cf6f527 | NA | 1 | 2 | 45S |
| uuid:ae6f20a8-9f17-4905-8c34-0f5fc4f0e9a1 | Do you intend to buy some medicines outside of the facility? | Yes, in addition to the medicines prescribed by the healthcare provider | Yes, prescribed by the healthcare provider but not available at the facility | 39S |
| uuid:f9c30136-b8f9-450b-97ff-761999fa3967 | Do you intend to buy some medicines outside of the facility? | Yes, in addition to the medicines prescribed by the healthcare provider | Yes, prescribed by the healthcare provider but not available at the facility | 38S |
| uuid:f43b1362-d79a-4291-b9ba-bac28905f919 | Were you given general information or advice about feeding or breastfeeding? | Guidance on feeding, Guidance on breastfeeding, Advice to continue breastfeeding | Guidance on feeding, Advice to continue breastfeeding | 35S |
| uuid:2dc277ee-7674-4e00-a7c1-627e0f9dbd21 | How confident do you feel in how much of the medication to give each day and how many days to give it? | Quite confident | Very confident | 33S |
| uuid:db2649d4-12d9-4568-be8d-b636ff2e9c95 | Did you pay for something at the facility today? | Yes | No | 32S |
irregular_inst = c()
for (id in unique(df$`instance ID`)){
bin_vec = cut(df$start[df$`instance ID`==id],
breaks=10,
labels=F)
if (length(unique(bin_vec)) < 5) irregular_inst = c(irregular_inst, id)
}
paste0(length(irregular_inst), " out of ", length(unique(df$`instance ID`))," instances were found to have an inconsistent filling behaviour.")
## [1] "93 out of 203 instances were found to have an inconsistent filling behaviour."
last_bin_questions = c()
fig <- plot_ly(alpha=0.1)
for (id in irregular_inst){
temp_df = df[df$`instance ID`==id,]
temp_df$cut = cut(temp_df$start, breaks=10, labels=c("1. Part", "2. Part", "3. Part", "4. Part", "5. Part", "6. Part", "7. Part", "8. Part", "9. Part", "10. Part"))
fig <- fig %>% add_histogram(x=temp_df$cut, name=id)
last_bin_questions = c(last_bin_questions, temp_df$question_decoded[temp_df$cut=="10. Part"])
}
fig <- fig %>% layout(barmode = "overlay")
fig
kable(table(last_bin_questions) %>% as.data.frame() %>% arrange(desc(Freq)))
| last_bin_questions | Freq |
|---|---|
| Do you intend to buy some medicines outside of the facility? | 8 |
| Did you miss work to bring the child to the facility today? | 5 |
| Did you pay for something at the facility today? | 5 |
| Is this facility the closest health facility to your home? | 5 |
| Was the service delayed or were you kept waiting for a long time? | 5 |
| Can you show me all the medicines and prescriptions that you received? | 3 |
| Can you specify these signs and symptoms? | 3 |
| Did the provider explain to you how to give these medicines to the child at home? | 3 |
| Did the provider speak in a language you understand? | 3 |
| Did you feel the provider treated you and the child with respect? | 3 |
| Did you find the provider showed concern and empathy? | 3 |
| Did you find the provider was kind to you? | 3 |
| How do you feel overall with the service you received at the facility today? | 3 |
| Would you recommend this facility to a friend / family with a sick child? | 3 |
| Did the provider use a tablet like this one for the consultation of the child? | 2 |
| fcode | 2 |
| Please select the current district | 2 |
| Were you given general information or advice about feeding or breastfeeding? | 2 |
| Can you explain to me why this device was used? | 1 |
| Did the provider give or prescribe any medicines for the child to take home? | 1 |
| Did the provider refer the child? | 1 |
| Did the provider tell you what illness your child has? | 1 |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | 1 |
| Please scan the participant’s QR code | 1 |
| Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | 1 |
| What do you intend to do if the sick child does not get completely better or become worse? | 1 |